{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "c2efd5b6",
   "metadata": {},
   "source": [
    "### Pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "718bf797",
   "metadata": {},
   "outputs": [],
   "source": [
    "# import modules\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "3c938e3e",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 2 µs, sys: 1 µs, total: 3 µs\n",
      "Wall time: 6.2 µs\n",
      "(335484, 13)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>removed</th>\n",
       "      <th>log_index</th>\n",
       "      <th>transaction_index</th>\n",
       "      <th>transaction_hash</th>\n",
       "      <th>block_hash</th>\n",
       "      <th>block_number</th>\n",
       "      <th>address</th>\n",
       "      <th>data</th>\n",
       "      <th>topic_0</th>\n",
       "      <th>topic_1</th>\n",
       "      <th>topic_2</th>\n",
       "      <th>topic_3</th>\n",
       "      <th>block_timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0xdd6df5c66ca36a2fc7eb5aa53ca0fda5e752f8f5a02f...</td>\n",
       "      <td>0x99a194c70a1da06d9b814168c839d960afd65588c93f...</td>\n",
       "      <td>12738509</td>\n",
       "      <td>0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2</td>\n",
       "      <td>0x00000000000000000000000000000000000000000000...</td>\n",
       "      <td>0x8c5be1e5ebec7d5bd14f71427d1e84f3dd0314c0f7b2...</td>\n",
       "      <td>0x00000000000000000000000049e38025ce640e8b0d70...</td>\n",
       "      <td>0x000000000000000000000000e592427a0aece92de3ed...</td>\n",
       "      <td>None</td>\n",
       "      <td>1625097609</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0xdd6df5c66ca36a2fc7eb5aa53ca0fda5e752f8f5a02f...</td>\n",
       "      <td>0x99a194c70a1da06d9b814168c839d960afd65588c93f...</td>\n",
       "      <td>12738509</td>\n",
       "      <td>0x2260FAC5E5542a773Aa44fBCfeDf7C193bc2C599</td>\n",
       "      <td>0x00000000000000000000000000000000000000000000...</td>\n",
       "      <td>0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4...</td>\n",
       "      <td>0x0000000000000000000000004585fe77225b41b697c9...</td>\n",
       "      <td>0x00000000000000000000000049e38025ce640e8b0d70...</td>\n",
       "      <td>None</td>\n",
       "      <td>1625097609</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0xdd6df5c66ca36a2fc7eb5aa53ca0fda5e752f8f5a02f...</td>\n",
       "      <td>0x99a194c70a1da06d9b814168c839d960afd65588c93f...</td>\n",
       "      <td>12738509</td>\n",
       "      <td>0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2</td>\n",
       "      <td>0x00000000000000000000000000000000000000000000...</td>\n",
       "      <td>0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4...</td>\n",
       "      <td>0x00000000000000000000000049e38025ce640e8b0d70...</td>\n",
       "      <td>0x0000000000000000000000004585fe77225b41b697c9...</td>\n",
       "      <td>None</td>\n",
       "      <td>1625097609</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0xdd6df5c66ca36a2fc7eb5aa53ca0fda5e752f8f5a02f...</td>\n",
       "      <td>0x99a194c70a1da06d9b814168c839d960afd65588c93f...</td>\n",
       "      <td>12738509</td>\n",
       "      <td>0x4585FE77225b41b697C938B018E2Ac67Ac5a20c0</td>\n",
       "      <td>0xffffffffffffffffffffffffffffffffffffffffffff...</td>\n",
       "      <td>0xc42079f94a6350d7e6235f29174924f928cc2ac818eb...</td>\n",
       "      <td>0x000000000000000000000000e592427a0aece92de3ed...</td>\n",
       "      <td>0x00000000000000000000000049e38025ce640e8b0d70...</td>\n",
       "      <td>None</td>\n",
       "      <td>1625097609</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0x106dc23545f27274895c5dbb0df934cbdb8b73bc6b56...</td>\n",
       "      <td>0x99a194c70a1da06d9b814168c839d960afd65588c93f...</td>\n",
       "      <td>12738509</td>\n",
       "      <td>0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48</td>\n",
       "      <td>0x00000000000000000000000000000000000000000000...</td>\n",
       "      <td>0x8c5be1e5ebec7d5bd14f71427d1e84f3dd0314c0f7b2...</td>\n",
       "      <td>0x000000000000000000000000a97810f352914703041b...</td>\n",
       "      <td>0x000000000000000000000000e592427a0aece92de3ed...</td>\n",
       "      <td>None</td>\n",
       "      <td>1625097609</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   removed  log_index  transaction_index  \\\n",
       "0    False          0                  0   \n",
       "1    False          1                  0   \n",
       "2    False          2                  0   \n",
       "3    False          3                  0   \n",
       "4    False          0                  1   \n",
       "\n",
       "                                    transaction_hash  \\\n",
       "0  0xdd6df5c66ca36a2fc7eb5aa53ca0fda5e752f8f5a02f...   \n",
       "1  0xdd6df5c66ca36a2fc7eb5aa53ca0fda5e752f8f5a02f...   \n",
       "2  0xdd6df5c66ca36a2fc7eb5aa53ca0fda5e752f8f5a02f...   \n",
       "3  0xdd6df5c66ca36a2fc7eb5aa53ca0fda5e752f8f5a02f...   \n",
       "4  0x106dc23545f27274895c5dbb0df934cbdb8b73bc6b56...   \n",
       "\n",
       "                                          block_hash  block_number  \\\n",
       "0  0x99a194c70a1da06d9b814168c839d960afd65588c93f...      12738509   \n",
       "1  0x99a194c70a1da06d9b814168c839d960afd65588c93f...      12738509   \n",
       "2  0x99a194c70a1da06d9b814168c839d960afd65588c93f...      12738509   \n",
       "3  0x99a194c70a1da06d9b814168c839d960afd65588c93f...      12738509   \n",
       "4  0x99a194c70a1da06d9b814168c839d960afd65588c93f...      12738509   \n",
       "\n",
       "                                      address  \\\n",
       "0  0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2   \n",
       "1  0x2260FAC5E5542a773Aa44fBCfeDf7C193bc2C599   \n",
       "2  0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2   \n",
       "3  0x4585FE77225b41b697C938B018E2Ac67Ac5a20c0   \n",
       "4  0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48   \n",
       "\n",
       "                                                data  \\\n",
       "0  0x00000000000000000000000000000000000000000000...   \n",
       "1  0x00000000000000000000000000000000000000000000...   \n",
       "2  0x00000000000000000000000000000000000000000000...   \n",
       "3  0xffffffffffffffffffffffffffffffffffffffffffff...   \n",
       "4  0x00000000000000000000000000000000000000000000...   \n",
       "\n",
       "                                             topic_0  \\\n",
       "0  0x8c5be1e5ebec7d5bd14f71427d1e84f3dd0314c0f7b2...   \n",
       "1  0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4...   \n",
       "2  0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4...   \n",
       "3  0xc42079f94a6350d7e6235f29174924f928cc2ac818eb...   \n",
       "4  0x8c5be1e5ebec7d5bd14f71427d1e84f3dd0314c0f7b2...   \n",
       "\n",
       "                                             topic_1  \\\n",
       "0  0x00000000000000000000000049e38025ce640e8b0d70...   \n",
       "1  0x0000000000000000000000004585fe77225b41b697c9...   \n",
       "2  0x00000000000000000000000049e38025ce640e8b0d70...   \n",
       "3  0x000000000000000000000000e592427a0aece92de3ed...   \n",
       "4  0x000000000000000000000000a97810f352914703041b...   \n",
       "\n",
       "                                             topic_2 topic_3  block_timestamp  \n",
       "0  0x000000000000000000000000e592427a0aece92de3ed...    None       1625097609  \n",
       "1  0x00000000000000000000000049e38025ce640e8b0d70...    None       1625097609  \n",
       "2  0x0000000000000000000000004585fe77225b41b697c9...    None       1625097609  \n",
       "3  0x00000000000000000000000049e38025ce640e8b0d70...    None       1625097609  \n",
       "4  0x000000000000000000000000e592427a0aece92de3ed...    None       1625097609  "
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# import demo files\n",
    "%time\n",
    "pdemo_df = pd.read_parquet('../data/chp3_dfile.parquet')\n",
    "print(pdemo_df.shape)\n",
    "pdemo_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "03f98ef4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs\n",
      "Wall time: 6.91 µs\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>removed</th>\n",
       "      <th>log_index</th>\n",
       "      <th>transaction_index</th>\n",
       "      <th>transaction_hash</th>\n",
       "      <th>block_hash</th>\n",
       "      <th>address</th>\n",
       "      <th>data</th>\n",
       "      <th>topic_0</th>\n",
       "      <th>topic_1</th>\n",
       "      <th>topic_2</th>\n",
       "      <th>topic_3</th>\n",
       "      <th>block_timestamp</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>block_number</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>12738509</th>\n",
       "      <td>147</td>\n",
       "      <td>147</td>\n",
       "      <td>147</td>\n",
       "      <td>147</td>\n",
       "      <td>147</td>\n",
       "      <td>147</td>\n",
       "      <td>147</td>\n",
       "      <td>147</td>\n",
       "      <td>128</td>\n",
       "      <td>118</td>\n",
       "      <td>13</td>\n",
       "      <td>147</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12738510</th>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>349</td>\n",
       "      <td>289</td>\n",
       "      <td>46</td>\n",
       "      <td>383</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12738511</th>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>354</td>\n",
       "      <td>284</td>\n",
       "      <td>33</td>\n",
       "      <td>383</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12738512</th>\n",
       "      <td>389</td>\n",
       "      <td>389</td>\n",
       "      <td>389</td>\n",
       "      <td>389</td>\n",
       "      <td>389</td>\n",
       "      <td>389</td>\n",
       "      <td>389</td>\n",
       "      <td>389</td>\n",
       "      <td>349</td>\n",
       "      <td>313</td>\n",
       "      <td>25</td>\n",
       "      <td>389</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12738513</th>\n",
       "      <td>425</td>\n",
       "      <td>425</td>\n",
       "      <td>425</td>\n",
       "      <td>425</td>\n",
       "      <td>425</td>\n",
       "      <td>425</td>\n",
       "      <td>425</td>\n",
       "      <td>425</td>\n",
       "      <td>380</td>\n",
       "      <td>332</td>\n",
       "      <td>43</td>\n",
       "      <td>425</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              removed  log_index  transaction_index  transaction_hash  \\\n",
       "block_number                                                            \n",
       "12738509          147        147                147               147   \n",
       "12738510          383        383                383               383   \n",
       "12738511          383        383                383               383   \n",
       "12738512          389        389                389               389   \n",
       "12738513          425        425                425               425   \n",
       "\n",
       "              block_hash  address  data  topic_0  topic_1  topic_2  topic_3  \\\n",
       "block_number                                                                  \n",
       "12738509             147      147   147      147      128      118       13   \n",
       "12738510             383      383   383      383      349      289       46   \n",
       "12738511             383      383   383      383      354      284       33   \n",
       "12738512             389      389   389      389      349      313       25   \n",
       "12738513             425      425   425      425      380      332       43   \n",
       "\n",
       "              block_timestamp  \n",
       "block_number                   \n",
       "12738509                  147  \n",
       "12738510                  383  \n",
       "12738511                  383  \n",
       "12738512                  389  \n",
       "12738513                  425  "
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# aggregate data by block number\n",
    "%time\n",
    "pdemo_df.groupby('block_number').count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "3c8d2a91",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs\n",
      "Wall time: 5.96 µs\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0        0\n",
       "1        1\n",
       "2        2\n",
       "3        3\n",
       "4        4\n",
       "      ... \n",
       "593    593\n",
       "594    594\n",
       "595    595\n",
       "596    596\n",
       "597    597\n",
       "Length: 598, dtype: int32"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# return Series of unique index logs\n",
    "%time\n",
    "pd.Series(pdemo_df['log_index'].unique())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1df02fe0",
   "metadata": {},
   "source": [
    "## Dask"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "3c122181",
   "metadata": {},
   "outputs": [],
   "source": [
    "# import modules\n",
    "import dask.dataframe as dd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "d8a5a451",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 2 µs, sys: 1 µs, total: 3 µs\n",
      "Wall time: 6.91 µs\n",
      "(Delayed('int-c855a889-ef18-4e83-8e90-6d69d061d742'), 13)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>removed</th>\n",
       "      <th>log_index</th>\n",
       "      <th>transaction_index</th>\n",
       "      <th>transaction_hash</th>\n",
       "      <th>block_hash</th>\n",
       "      <th>block_number</th>\n",
       "      <th>address</th>\n",
       "      <th>data</th>\n",
       "      <th>topic_0</th>\n",
       "      <th>topic_1</th>\n",
       "      <th>topic_2</th>\n",
       "      <th>topic_3</th>\n",
       "      <th>block_timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0xdd6df5c66ca36a2fc7eb5aa53ca0fda5e752f8f5a02f...</td>\n",
       "      <td>0x99a194c70a1da06d9b814168c839d960afd65588c93f...</td>\n",
       "      <td>12738509</td>\n",
       "      <td>0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2</td>\n",
       "      <td>0x00000000000000000000000000000000000000000000...</td>\n",
       "      <td>0x8c5be1e5ebec7d5bd14f71427d1e84f3dd0314c0f7b2...</td>\n",
       "      <td>0x00000000000000000000000049e38025ce640e8b0d70...</td>\n",
       "      <td>0x000000000000000000000000e592427a0aece92de3ed...</td>\n",
       "      <td>None</td>\n",
       "      <td>1625097609</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0xdd6df5c66ca36a2fc7eb5aa53ca0fda5e752f8f5a02f...</td>\n",
       "      <td>0x99a194c70a1da06d9b814168c839d960afd65588c93f...</td>\n",
       "      <td>12738509</td>\n",
       "      <td>0x2260FAC5E5542a773Aa44fBCfeDf7C193bc2C599</td>\n",
       "      <td>0x00000000000000000000000000000000000000000000...</td>\n",
       "      <td>0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4...</td>\n",
       "      <td>0x0000000000000000000000004585fe77225b41b697c9...</td>\n",
       "      <td>0x00000000000000000000000049e38025ce640e8b0d70...</td>\n",
       "      <td>None</td>\n",
       "      <td>1625097609</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0xdd6df5c66ca36a2fc7eb5aa53ca0fda5e752f8f5a02f...</td>\n",
       "      <td>0x99a194c70a1da06d9b814168c839d960afd65588c93f...</td>\n",
       "      <td>12738509</td>\n",
       "      <td>0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2</td>\n",
       "      <td>0x00000000000000000000000000000000000000000000...</td>\n",
       "      <td>0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4...</td>\n",
       "      <td>0x00000000000000000000000049e38025ce640e8b0d70...</td>\n",
       "      <td>0x0000000000000000000000004585fe77225b41b697c9...</td>\n",
       "      <td>None</td>\n",
       "      <td>1625097609</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0xdd6df5c66ca36a2fc7eb5aa53ca0fda5e752f8f5a02f...</td>\n",
       "      <td>0x99a194c70a1da06d9b814168c839d960afd65588c93f...</td>\n",
       "      <td>12738509</td>\n",
       "      <td>0x4585FE77225b41b697C938B018E2Ac67Ac5a20c0</td>\n",
       "      <td>0xffffffffffffffffffffffffffffffffffffffffffff...</td>\n",
       "      <td>0xc42079f94a6350d7e6235f29174924f928cc2ac818eb...</td>\n",
       "      <td>0x000000000000000000000000e592427a0aece92de3ed...</td>\n",
       "      <td>0x00000000000000000000000049e38025ce640e8b0d70...</td>\n",
       "      <td>None</td>\n",
       "      <td>1625097609</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0x106dc23545f27274895c5dbb0df934cbdb8b73bc6b56...</td>\n",
       "      <td>0x99a194c70a1da06d9b814168c839d960afd65588c93f...</td>\n",
       "      <td>12738509</td>\n",
       "      <td>0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48</td>\n",
       "      <td>0x00000000000000000000000000000000000000000000...</td>\n",
       "      <td>0x8c5be1e5ebec7d5bd14f71427d1e84f3dd0314c0f7b2...</td>\n",
       "      <td>0x000000000000000000000000a97810f352914703041b...</td>\n",
       "      <td>0x000000000000000000000000e592427a0aece92de3ed...</td>\n",
       "      <td>None</td>\n",
       "      <td>1625097609</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   removed  log_index  transaction_index  \\\n",
       "0    False          0                  0   \n",
       "1    False          1                  0   \n",
       "2    False          2                  0   \n",
       "3    False          3                  0   \n",
       "4    False          0                  1   \n",
       "\n",
       "                                    transaction_hash  \\\n",
       "0  0xdd6df5c66ca36a2fc7eb5aa53ca0fda5e752f8f5a02f...   \n",
       "1  0xdd6df5c66ca36a2fc7eb5aa53ca0fda5e752f8f5a02f...   \n",
       "2  0xdd6df5c66ca36a2fc7eb5aa53ca0fda5e752f8f5a02f...   \n",
       "3  0xdd6df5c66ca36a2fc7eb5aa53ca0fda5e752f8f5a02f...   \n",
       "4  0x106dc23545f27274895c5dbb0df934cbdb8b73bc6b56...   \n",
       "\n",
       "                                          block_hash  block_number  \\\n",
       "0  0x99a194c70a1da06d9b814168c839d960afd65588c93f...      12738509   \n",
       "1  0x99a194c70a1da06d9b814168c839d960afd65588c93f...      12738509   \n",
       "2  0x99a194c70a1da06d9b814168c839d960afd65588c93f...      12738509   \n",
       "3  0x99a194c70a1da06d9b814168c839d960afd65588c93f...      12738509   \n",
       "4  0x99a194c70a1da06d9b814168c839d960afd65588c93f...      12738509   \n",
       "\n",
       "                                      address  \\\n",
       "0  0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2   \n",
       "1  0x2260FAC5E5542a773Aa44fBCfeDf7C193bc2C599   \n",
       "2  0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2   \n",
       "3  0x4585FE77225b41b697C938B018E2Ac67Ac5a20c0   \n",
       "4  0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48   \n",
       "\n",
       "                                                data  \\\n",
       "0  0x00000000000000000000000000000000000000000000...   \n",
       "1  0x00000000000000000000000000000000000000000000...   \n",
       "2  0x00000000000000000000000000000000000000000000...   \n",
       "3  0xffffffffffffffffffffffffffffffffffffffffffff...   \n",
       "4  0x00000000000000000000000000000000000000000000...   \n",
       "\n",
       "                                             topic_0  \\\n",
       "0  0x8c5be1e5ebec7d5bd14f71427d1e84f3dd0314c0f7b2...   \n",
       "1  0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4...   \n",
       "2  0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4...   \n",
       "3  0xc42079f94a6350d7e6235f29174924f928cc2ac818eb...   \n",
       "4  0x8c5be1e5ebec7d5bd14f71427d1e84f3dd0314c0f7b2...   \n",
       "\n",
       "                                             topic_1  \\\n",
       "0  0x00000000000000000000000049e38025ce640e8b0d70...   \n",
       "1  0x0000000000000000000000004585fe77225b41b697c9...   \n",
       "2  0x00000000000000000000000049e38025ce640e8b0d70...   \n",
       "3  0x000000000000000000000000e592427a0aece92de3ed...   \n",
       "4  0x000000000000000000000000a97810f352914703041b...   \n",
       "\n",
       "                                             topic_2 topic_3  block_timestamp  \n",
       "0  0x000000000000000000000000e592427a0aece92de3ed...    None       1625097609  \n",
       "1  0x00000000000000000000000049e38025ce640e8b0d70...    None       1625097609  \n",
       "2  0x0000000000000000000000004585fe77225b41b697c9...    None       1625097609  \n",
       "3  0x00000000000000000000000049e38025ce640e8b0d70...    None       1625097609  \n",
       "4  0x000000000000000000000000e592427a0aece92de3ed...    None       1625097609  "
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# import demo data\n",
    "%time\n",
    "ddemo_df = dd.read_parquet('../data/chp3_dfile.parquet')\n",
    "print(ddemo_df.shape)\n",
    "ddemo_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "ea977b47",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 3 µs, sys: 2 µs, total: 5 µs\n",
      "Wall time: 6.91 µs\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>removed</th>\n",
       "      <th>log_index</th>\n",
       "      <th>transaction_index</th>\n",
       "      <th>transaction_hash</th>\n",
       "      <th>block_hash</th>\n",
       "      <th>address</th>\n",
       "      <th>data</th>\n",
       "      <th>topic_0</th>\n",
       "      <th>topic_1</th>\n",
       "      <th>topic_2</th>\n",
       "      <th>topic_3</th>\n",
       "      <th>block_timestamp</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>block_number</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>12738509</th>\n",
       "      <td>147</td>\n",
       "      <td>147</td>\n",
       "      <td>147</td>\n",
       "      <td>147</td>\n",
       "      <td>147</td>\n",
       "      <td>147</td>\n",
       "      <td>147</td>\n",
       "      <td>147</td>\n",
       "      <td>128</td>\n",
       "      <td>118</td>\n",
       "      <td>13</td>\n",
       "      <td>147</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12738510</th>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>349</td>\n",
       "      <td>289</td>\n",
       "      <td>46</td>\n",
       "      <td>383</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12738511</th>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>383</td>\n",
       "      <td>354</td>\n",
       "      <td>284</td>\n",
       "      <td>33</td>\n",
       "      <td>383</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12738512</th>\n",
       "      <td>389</td>\n",
       "      <td>389</td>\n",
       "      <td>389</td>\n",
       "      <td>389</td>\n",
       "      <td>389</td>\n",
       "      <td>389</td>\n",
       "      <td>389</td>\n",
       "      <td>389</td>\n",
       "      <td>349</td>\n",
       "      <td>313</td>\n",
       "      <td>25</td>\n",
       "      <td>389</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12738513</th>\n",
       "      <td>425</td>\n",
       "      <td>425</td>\n",
       "      <td>425</td>\n",
       "      <td>425</td>\n",
       "      <td>425</td>\n",
       "      <td>425</td>\n",
       "      <td>425</td>\n",
       "      <td>425</td>\n",
       "      <td>380</td>\n",
       "      <td>332</td>\n",
       "      <td>43</td>\n",
       "      <td>425</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12739504</th>\n",
       "      <td>364</td>\n",
       "      <td>364</td>\n",
       "      <td>364</td>\n",
       "      <td>364</td>\n",
       "      <td>364</td>\n",
       "      <td>364</td>\n",
       "      <td>364</td>\n",
       "      <td>364</td>\n",
       "      <td>315</td>\n",
       "      <td>279</td>\n",
       "      <td>10</td>\n",
       "      <td>364</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12739505</th>\n",
       "      <td>387</td>\n",
       "      <td>387</td>\n",
       "      <td>387</td>\n",
       "      <td>387</td>\n",
       "      <td>387</td>\n",
       "      <td>387</td>\n",
       "      <td>387</td>\n",
       "      <td>387</td>\n",
       "      <td>344</td>\n",
       "      <td>291</td>\n",
       "      <td>22</td>\n",
       "      <td>387</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12739506</th>\n",
       "      <td>307</td>\n",
       "      <td>307</td>\n",
       "      <td>307</td>\n",
       "      <td>307</td>\n",
       "      <td>307</td>\n",
       "      <td>307</td>\n",
       "      <td>307</td>\n",
       "      <td>307</td>\n",
       "      <td>267</td>\n",
       "      <td>232</td>\n",
       "      <td>33</td>\n",
       "      <td>307</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12739507</th>\n",
       "      <td>367</td>\n",
       "      <td>367</td>\n",
       "      <td>367</td>\n",
       "      <td>367</td>\n",
       "      <td>367</td>\n",
       "      <td>367</td>\n",
       "      <td>367</td>\n",
       "      <td>367</td>\n",
       "      <td>327</td>\n",
       "      <td>276</td>\n",
       "      <td>24</td>\n",
       "      <td>367</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12739508</th>\n",
       "      <td>392</td>\n",
       "      <td>392</td>\n",
       "      <td>392</td>\n",
       "      <td>392</td>\n",
       "      <td>392</td>\n",
       "      <td>392</td>\n",
       "      <td>392</td>\n",
       "      <td>392</td>\n",
       "      <td>342</td>\n",
       "      <td>292</td>\n",
       "      <td>8</td>\n",
       "      <td>392</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>987 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              removed  log_index  transaction_index  transaction_hash  \\\n",
       "block_number                                                            \n",
       "12738509          147        147                147               147   \n",
       "12738510          383        383                383               383   \n",
       "12738511          383        383                383               383   \n",
       "12738512          389        389                389               389   \n",
       "12738513          425        425                425               425   \n",
       "...               ...        ...                ...               ...   \n",
       "12739504          364        364                364               364   \n",
       "12739505          387        387                387               387   \n",
       "12739506          307        307                307               307   \n",
       "12739507          367        367                367               367   \n",
       "12739508          392        392                392               392   \n",
       "\n",
       "              block_hash  address  data  topic_0  topic_1  topic_2  topic_3  \\\n",
       "block_number                                                                  \n",
       "12738509             147      147   147      147      128      118       13   \n",
       "12738510             383      383   383      383      349      289       46   \n",
       "12738511             383      383   383      383      354      284       33   \n",
       "12738512             389      389   389      389      349      313       25   \n",
       "12738513             425      425   425      425      380      332       43   \n",
       "...                  ...      ...   ...      ...      ...      ...      ...   \n",
       "12739504             364      364   364      364      315      279       10   \n",
       "12739505             387      387   387      387      344      291       22   \n",
       "12739506             307      307   307      307      267      232       33   \n",
       "12739507             367      367   367      367      327      276       24   \n",
       "12739508             392      392   392      392      342      292        8   \n",
       "\n",
       "              block_timestamp  \n",
       "block_number                   \n",
       "12738509                  147  \n",
       "12738510                  383  \n",
       "12738511                  383  \n",
       "12738512                  389  \n",
       "12738513                  425  \n",
       "...                       ...  \n",
       "12739504                  364  \n",
       "12739505                  387  \n",
       "12739506                  307  \n",
       "12739507                  367  \n",
       "12739508                  392  \n",
       "\n",
       "[987 rows x 12 columns]"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# aggregate data by block number\n",
    "%time\n",
    "ddemo_df.groupby('block_number').count().compute()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "0a6a6eb9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1 µs, sys: 1 µs, total: 2 µs\n",
      "Wall time: 4.77 µs\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0        0\n",
       "1        1\n",
       "2        2\n",
       "3        3\n",
       "4        4\n",
       "      ... \n",
       "593    593\n",
       "594    594\n",
       "595    595\n",
       "596    596\n",
       "597    597\n",
       "Name: log_index, Length: 598, dtype: int32"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# return Series of unique index logs\n",
    "%time\n",
    "ddemo_df['log_index'].unique().compute()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
